##############################
##     run bootstrap on     ##
##  cleaned cces and output ##
## matrices of distribution ##
##       of opinion by      ##
##        party/class       ##
##############################

rm(list = ls())

# setwd('~/../../Class+Party/')

.libPaths('../rpackages')

library(tidyverse)

source('bootstrap_funs.R')

options(dplyr.summarise.inform = FALSE)

cces_allyears <- readRDS('cces_allyears.rds') %>% 
  filter(year != 2010)

cces_allyears <- cces_allyears %>% 
  subset_unique_questions()
cces_allyears <- cces_allyears %>%   # drops leaners and weird cross-party identifiers from being d or r
  mutate(pid3 = case_when(pid3 == 'Democrat' & pid7 %in% c('Strong Democrat', 'Not very strong Democrat') ~ 'Democrat',
                          pid3 == 'Republican' & pid7 %in% c('Strong Republican', 'Not very strong Republican') ~ 'Republican',
                          pid3 != 'Independent' & pid7 %in% c('Lean Democrat', 'Lean Republican') ~ 'Independent',
                          TRUE ~ pid3))
  
issuetopics <- cces_allyears %>% 
  select(question, year, pap_topic, topic_3, topic_6) %>% 
  distinct() %>% 
  arrange(question)

n_boot <- 1000

output_tables <- expand.grid(party = c('dem', 'rep', 'all'),
                             var = c('all', 
                                     'inctop', 'incmiddle', 'incbottom',
                                     'racewhite', 'raceblack', 'racehisp', 
                                     'sexmale', 'sexfemale',
                                     'urban', 'rural',
                                     'religbornagain', 'relignotbornagain',
                                     'religchurchweekly', 'religchurchnever',
                                     'educnohs', 'educhs', 'educcoll',
                                     'ageold', 'ageyoung'))

output_tables <- paste(output_tables$party, output_tables$var, 'boot', sep = '_')

for (i in output_tables) {
  assign(i, matrix(nrow = nrow(issuetopics),
                   ncol = n_boot,
                   dimnames = list(sort(issuetopics$question),
                                   1:n_boot)))
}


set.seed(12345)
ptm <- proc.time()

for (s in 1:n_boot) {
  if (s %% floor(n_boot/10) == 0) {cat('|', s, (proc.time()-ptm)['elapsed']/60, '\n')} else if (s %% floor(n_boot/100) == 0) {cat('|')}
  
  boot_cces <- boot_sample_cces(cces_allyears, quantile = 0.1)
  
  boot_cces$pid_new <- NA
  boot_cces$pid_new[boot_cces$pid3 %in% c('Republican', 'Democrat')] <- boot_cces$pid3[boot_cces$pid3 %in% c('Republican', 'Democrat')]
  boot_cces$pid_new[boot_cces$pid3 %in% c('Independent', 'Other')] <- 'Independent/Other'
  boot_cces <- boot_cces[,names(boot_cces) != 'pid3']
  
  boot_cces <- boot_cces %>% 
    drop_na(opinion)
  
  
  # all analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new) %>% 
    group_by(pid_new, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                group_by(question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    spread(key = pid_new, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_all_boot[,s] <- group_opinions$All
  rep_all_boot[,s] <- group_opinions$Republican
  dem_all_boot[,s] <- group_opinions$Democrat
  
  
  # class analysis
  
  boot_cces$faminc_group <- NA
  boot_cces$faminc_group[boot_cces$faminc_bottom == 1] <- 'bottom'
  boot_cces$faminc_group[boot_cces$faminc_top == 1] <- 'top'
  boot_cces$faminc_group[boot_cces$faminc_middle == 1] <- 'middle'
  
  
  group_opinions <- boot_cces %>%
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, faminc_group) %>% 
    group_by(pid_new, faminc_group, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>%
                drop_na(faminc_group) %>% 
                group_by(faminc_group, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_income = paste(pid_new, faminc_group, sep = '_')) %>% 
    select(-pid_new, -faminc_group) %>% 
    spread(key = party_income, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_inctop_boot[,s] <- group_opinions$All_top
  all_incmiddle_boot[,s] <- group_opinions$All_middle
  all_incbottom_boot[,s] <- group_opinions$All_bottom
  rep_inctop_boot[,s] <- group_opinions$Republican_top
  rep_incmiddle_boot[,s] <- group_opinions$Republican_middle
  rep_incbottom_boot[,s] <- group_opinions$Republican_bottom
  dem_inctop_boot[,s] <- group_opinions$Democrat_top
  dem_incmiddle_boot[,s] <- group_opinions$Democrat_middle
  dem_incbottom_boot[,s] <- group_opinions$Democrat_bottom
  

  
  # race analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other',
           race_new %in% c('White', 'Black', 'Hispanic')) %>% 
    drop_na(pid_new) %>% 
    group_by(pid_new, race_new, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                filter(race_new %in% c('White', 'Black', 'Hispanic')) %>% 
                group_by(race_new, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_race = paste(pid_new, race_new, sep = '_')) %>% 
    select(-pid_new, -race_new) %>% 
    spread(key = party_race, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_raceblack_boot[,s] <- group_opinions$All_Black 
  all_racehisp_boot[,s] <- group_opinions$All_Hispanic
  all_racewhite_boot[,s] <- group_opinions$All_White
  dem_raceblack_boot[,s] <- group_opinions$Democrat_Black
  dem_racehisp_boot[,s] <- group_opinions$Democrat_Hispanic
  dem_racewhite_boot[,s] <- group_opinions$Democrat_White
  rep_raceblack_boot[,s] <- group_opinions$Republican_Black
  rep_racehisp_boot[,s] <- group_opinions$Republican_Hispanic
  rep_racewhite_boot[,s] <- group_opinions$Republican_White
  
  
  # education analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    mutate(educ_new = case_when(educ == 'No HS' ~ 'nohs',
                                educ %in% c('High school graduate', 'Some college', '2-year') ~ 'highschool',
                                educ %in% c('4-year', 'Post-grad') ~ 'college')) %>% 
    drop_na(pid_new, educ_new) %>% 
    group_by(pid_new, educ_new, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                mutate(educ_new = case_when(educ == 'No HS' ~ 'nohs',
                                            educ %in% c('High school graduate', 'Some college', '2-year') ~ 'highschool',
                                            educ %in% c('4-year', 'Post-grad') ~ 'college')) %>% 
                drop_na(educ_new) %>% 
                group_by(educ_new, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_educ = paste(pid_new, educ_new, sep = '_')) %>% 
    select(-pid_new, -educ_new) %>% 
    spread(key = party_educ, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_educnohs_boot[,s] <- group_opinions$All_nohs
  all_educhs_boot[,s] <- group_opinions$All_highschool
  all_educcoll_boot[,s] <- group_opinions$All_college
  rep_educnohs_boot[,s] <- group_opinions$Republican_nohs
  rep_educhs_boot[,s] <- group_opinions$Republican_highschool
  rep_educcoll_boot[,s] <- group_opinions$Republican_college
  dem_educnohs_boot[,s] <- group_opinions$Democrat_nohs
  dem_educhs_boot[,s] <- group_opinions$Democrat_highschool
  dem_educcoll_boot[,s] <- group_opinions$Democrat_college
  
  
  # sex analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, female) %>% 
    group_by(pid_new, female, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                drop_na(female) %>% 
                group_by(female, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_sex = paste(pid_new, female, sep = '_')) %>% 
    select(-pid_new, -female) %>% 
    spread(key = party_sex, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_sexfemale_boot[,s] <- group_opinions$All_1
  all_sexmale_boot[,s] <- group_opinions$All_0
  rep_sexfemale_boot[,s] <- group_opinions$Republican_1
  rep_sexmale_boot[,s] <- group_opinions$Republican_0
  dem_sexfemale_boot[,s] <- group_opinions$Democrat_1
  dem_sexmale_boot[,s] <- group_opinions$Democrat_0
  
  
  # religion analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, bornagain) %>% 
    group_by(pid_new, bornagain, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                drop_na(bornagain) %>% 
                group_by(bornagain, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_relig = paste(pid_new, bornagain, sep = '_')) %>% 
    select(-pid_new, -bornagain) %>% 
    spread(key = party_relig, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_religbornagain_boot[,s] <- group_opinions$All_1
  all_relignotbornagain_boot[,s] <- group_opinions$All_0
  rep_religbornagain_boot[,s] <- group_opinions$Republican_1
  rep_relignotbornagain_boot[,s] <- group_opinions$Republican_0
  dem_religbornagain_boot[,s] <- group_opinions$Democrat_1
  dem_relignotbornagain_boot[,s] <- group_opinions$Democrat_0
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, church) %>% 
    group_by(pid_new, church, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                drop_na(church) %>% 
                group_by(church, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_relig = paste(pid_new, church, sep = '_')) %>% 
    select(-pid_new, -church) %>% 
    spread(key = party_relig, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_religchurchweekly_boot[,s] <- group_opinions$All_weekly
  all_religchurchnever_boot[,s] <- group_opinions$All_never
  dem_religchurchweekly_boot[,s] <- group_opinions$Democrat_weekly
  dem_religchurchnever_boot[,s] <- group_opinions$Democrat_never
  rep_religchurchweekly_boot[,s] <- group_opinions$Republican_weekly
  rep_religchurchnever_boot[,s] <- group_opinions$Republican_never

  
  # age analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    drop_na(pid_new, agegrp) %>% 
    group_by(pid_new, agegrp, question) %>% 
    summarize(opinion = sum(opinion) / n()) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                drop_na(agegrp) %>% 
                group_by(agegrp, question) %>% 
                summarize(opinion = sum(opinion) / n()) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_age = paste(pid_new, agegrp, sep = '_')) %>% 
    select(-pid_new, -agegrp) %>% 
    spread(key = party_age, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question')
  
  all_ageold_boot[,s] <- group_opinions$All_old
  all_ageyoung_boot[,s] <- group_opinions$All_young
  rep_ageold_boot[,s] <- group_opinions$Republican_old
  rep_ageyoung_boot[,s] <- group_opinions$Republican_young
  dem_ageold_boot[,s] <- group_opinions$Democrat_old
  dem_ageyoung_boot[,s] <- group_opinions$Democrat_young
  

  
  # urban/rural analysis
  
  group_opinions <- boot_cces %>% 
    filter(pid_new != 'Independent/Other') %>% 
    group_by(pid_new, urbanrural, question) %>% 
    summarize(opinion = sum(opinion, na.rm = TRUE) / sum(!is.na(opinion), na.rm = TRUE)) %>% 
    ungroup() %>% 
    bind_rows(boot_cces %>% 
                group_by(urbanrural, question) %>% 
                summarize(opinion = sum(opinion, na.rm = TRUE) / sum(!is.na(opinion), na.rm = TRUE)) %>% 
                mutate(pid_new = 'All') %>% 
                ungroup()) %>% 
    mutate(party_urban = paste(pid_new, urbanrural, sep = '_')) %>% 
    select(-pid_new, -urbanrural) %>% 
    spread(key = party_urban, value = opinion) %>% 
    arrange(question) %>% 
    column_to_rownames('question') %>% 
    select(-Democrat_NA, -Republican_NA, -All_NA)
  
  all_urban_boot[,s] <- group_opinions$All_urban
  all_rural_boot[,s] <- group_opinions$All_rural
  dem_urban_boot[,s] <- group_opinions$Democrat_urban
  dem_rural_boot[,s] <- group_opinions$Democrat_rural
  rep_urban_boot[,s] <- group_opinions$Republican_urban
  rep_rural_boot[,s] <- group_opinions$Republican_rural
  
  
  

}

print((proc.time() - ptm)/60)

output <- sapply(c('issuetopics', objects(pattern = '^(all|rep|dem)\\_')), function(x) get(x), USE.NAMES = TRUE)

saveRDS(output,
        'bootstrap_out/opinion_bootstrapped_noleaners_allcleavages.RDS')
